# 导入一个处理word文档的库
import docx

# 获取文档对象
file = docx.Document("C:\\Users\\Jutt\\Downloads\\六年级上册课文翻译.docx")
print("段落数:" + str(len(file.paragraphs)))  # 段落数，每个回车隔离一段
danyuanIndex = 73  # 初始的单元号
# 区分单元的
str1 = "第一课"
# 区分单页里面一页一页的
str2 = "Let"
str3 = "Listen"
str4 = "Read"
str5 = "Say"
str6 = "Ask "
strList = [str2, str3, str4, str5, str6]

newDuanluo = []
for para in file.paragraphs:
    hang = para.text
    hangList = hang.split('\n')
    if hangList[0]:
        for j in hangList:
            newDuanluo.append(j)
    else:
        continue
print(newDuanluo)
yidaoshi = ["一", "二", "三", "四", "五", "六", "七", "八", "九", "十", "十一", "十二", "十三", ]
newYi = []
for j in yidaoshi:
    newYi.append("第" + j + "课")
print(newYi)
book = {}  # 这是一本书，书里有一个一个的的单元
danyuan = {}  # 一个单元
jishuqi = 0  # 页码，遇见一个“let”加一
arr = []  # 每一页的翻译
for i in newDuanluo:
    for y in newYi:
        if y in i:  # 新的单元出线了
            print(i)
            danyuan[str(jishuqi)] = arr
            danyuanIndex = danyuanIndex + 1
            book[str(danyuanIndex - 1)] = danyuan
            danyuan = {}
            arr = []
            jishuqi = 0
            # arr.append(i)
            # print(i)
            continue
    for s in strList:
        if s in i:
            jishuqi = jishuqi + 1
            if jishuqi == 1:
                # arr.append(i)
                continue
            if jishuqi > 1:
                danyuan[str(jishuqi)] = []
                # print("第一页的翻译")
                # print(arr)  # 第一页的翻译
                danyuan[str(jishuqi - 1)] = arr
                arr = []
    arr.append(i)
# print(danyuan)
danyuan[str(jishuqi)] = arr
danyuanIndex = danyuanIndex + 1
book[str(danyuanIndex - 1)] = danyuan
print(book)
